#===============================================================================
# The following analyses were carried out using R version 4.0.3
#===============================================================================
# Install packages
# install.packages("tidyverse")
# install.packages("lme4")
# install.packages("stargazer")
# install.packages("dplyr")
#-------------------------------------------------------------------------------
#Load packages
#-------------------------------------------------------------------------------
rm(list = ls())
library(tidyverse)
library(lme4)
library(stargazer)
#===============================================================================
# set working directory
setwd(" ")

# read data
mydata<-read.csv("FixedAssets&Corruption.csv")

#===============================================================================
# Appendix A: Diagnostics of Interaction Models
#===============================================================================
#install "interflex"

#packageurl <- "https://cran.r-project.org/src/contrib/Archive/interflex/interflex_1.1.3.tar.gz"
#install.packages(packageurl, repos=NULL, type="source")

library("interflex")

#-------------------------------------------------------------------------------
# Figures A, B, C & D: Raw Data Plot
#-------------------------------------------------------------------------------
# LSC
raw_lsc <- inter.raw(Y = "letcs", D = "lintensity_investment_employment", X = "LSC", 
                     data = mydata, weights = NULL, Ylabel = "ETCs", 
                     Dlabel = "Fixed-Asset Intensity", Xlabel="LSC")
ggsave("raw_lsc.pdf",
       device = "pdf",width=10,height=5)

# Trust in Courts
raw_court <- inter.raw(Y = "letcs", D = "lintensity_investment_employment", X = "TrustCourts",
                       data = mydata, weights = NULL, Ylabel = "ETCs", 
                       Dlabel = "Fixed-Asset Intensity", Xlabel="Trust in Courts")
ggsave("raw_court.pdf",
       device = "pdf",width=10,height=5)

# Bureaucratic Integration
raw_bi <- inter.raw(Y = "letcs", D = "lintensity_investment_employment", X = "BI_mc",
                    data = mydata, weights = NULL, Ylabel = "ETCs", 
                    Dlabel = "Fixed-Asset Intensity", Xlabel="Bureaucratic Integration")
ggsave("raw_bi.pdf",
       device = "pdf",width=10,height=5)

# Enforcement
raw_enforce <- inter.raw(Y = "letcs", D = "lintensity_investment_employment", X = "enforce",
                         data = mydata, weights = NULL, Ylabel = "ETCs", 
                         Dlabel = "Fixed-Asset Intensity", Xlabel="Enforcement (Factor Scores)")
ggsave("raw_enforce.pdf",
       device = "pdf",width=10,height=5)

#-------------------------------------------------------------------------------
# Figure E: Plot of Marginal Effects from Kernel Regression
#-------------------------------------------------------------------------------
# LSC
set.seed(123111)
kernel_lsc <- inter.kernel(Y = "letcs", D = "lintensity_investment_employment", X = "LSC", 
                           data = mydata,
                           Z = c("marketsize","growthrate", "lscale","soe_influence", "mgovthelp", "mtaxrate",
                                 "gdpper_1999_2003", "pop","soe", "collective" , 
                                 "private", "foreign", "lage", "lemp",
                                 "sales_otherprov", "govtsales", "soesales", "relationship", 
                                 "licenses", "interaction"),
                           bw = (max(mydata$LSC,na.rm=T)-min(mydata$LSC,na.rm=T))/4,
                           Dlabel = "Fixed-Asset Intensity",
                           Xlabel = "LSC (Mean Centered)",
                           Ylabel = "ETCs",
                           na.rm = TRUE)
ggsave("kernel_lsc.pdf",
       device = "pdf",width=8,height=5)

# Trust in Courts
kernel_court <- inter.kernel(Y = "letcs", D = "lintensity_investment_employment", 
                             X = "TrustCourts", 
                             Z = c("marketsize","growthrate", "lscale", "soe_influence", "mgovthelp", "mtaxrate",
                                   "gdpper_1999_2003", "pop","soe", "collective" , 
                                   "private", "foreign", "lage", "lemp",
                                   "sales_otherprov", "govtsales", "soesales", "relationship", 
                                   "licenses", "interaction"),
                             data = mydata,
                             bw = (max(mydata$TrustCourts,na.rm=T)-min(mydata$TrustCourts,na.rm=T))/4, #0.265242,
                             Dlabel = "Fixed-Asset Intensity",
                             Xlabel = "Trust in Courts (Mean Centered)",
                             Ylabel = "ETCs",
                             na.rm = TRUE)
ggsave("kernel_court.pdf",
       device = "pdf",width=8,height=5)

# Bureaucratic Integration
kernel_bi <- inter.kernel(Y = "letcs", D = "lintensity_investment_employment", 
                          X = "BI_mc", 
                          Z = c("marketsize","growthrate", "lscale", "soe_influence", "mgovthelp", "mtaxrate",
                                "gdpper_1999_2003", "pop","soe", "collective" , 
                                "private", "foreign", "lage", "lemp",
                                "sales_otherprov", "govtsales", "soesales", "relationship", 
                                "licenses", "interaction"),
                          data = mydata, 
                          bw = (max(mydata$BI_mc,na.rm=T)-min(mydata$BI_mc,na.rm=T))/4,
                          Dlabel = "Fixed-Asset Intensity",
                          Xlabel = "Bureaucratic Integration (Mean Centered)",
                          Ylabel = "ETCs",
                          na.rm = TRUE)
ggsave("kernel_bi.pdf",
       device = "pdf",width=8,height=5)

# Enforcement
kernel_enforce <- inter.kernel(Y = "letcs", D = "lintensity_investment_employment", 
                               X = "enforce", 
                               Z = c("marketsize","growthrate", "lscale", "soe_influence", "mgovthelp", "mtaxrate",
                                     "gdpper_1999_2003", "pop","soe", "collective" , 
                                     "private", "foreign", "lage", "lemp",
                                     "sales_otherprov", "govtsales", "soesales", "relationship", 
                                     "licenses", "interaction"),
                               data = mydata, 
                               bw = (max(mydata$enforce,na.rm=T)-min(mydata$enforce,na.rm=T))/4,
                               Dlabel = "Fixed-Asset Intensity",
                               Xlabel = "Enforcement (Factor Scores)",
                               Ylabel = "ETCs",
                               na.rm = TRUE)
ggsave("kernel_enforce.pdf",
       device = "pdf",width=8,height=5)

#-------------------------------------------------------------------------------
# Three Bin Model
#-------------------------------------------------------------------------------
# LSC
out_lsc <- interflex(Y = "letcs", D = "lintensity_investment_employment", 
                   X = "LSC", 
                   Z = c("marketsize","growthrate", "lscale", "soe_influence", "mgovthelp", "mtaxrate",
                         "gdpper_1999_2003", "pop","soe", "collective" , 
                         "private", "foreign", "lage", "lemp",
                         "sales_otherprov", "govtsales", "soesales", "relationship", 
                         "licenses", "interaction"), 
                   data = mydata, estimator = 'binning', vartype = "robust", main = "Marginal Effects",
                   na.rm = TRUE)

out_lsc$tests

# Trust in Courts
out_court <- interflex(Y = "letcs", D = "lintensity_investment_employment", 
                     X = "TrustCourts", 
                     Z = c("marketsize","growthrate", "lscale", "soe_influence", "mgovthelp", "mtaxrate",
                           "gdpper_1999_2003", "pop","soe", "collective" , 
                           "private", "foreign", "lage", "lemp",
                           "sales_otherprov", "govtsales", "soesales", "relationship", 
                           "licenses", "interaction"), 
                     data = mydata, estimator = 'binning', vartype = "robust", main = "Marginal Effects",
                     na.rm = TRUE)

out_court$tests


# Bureaucratic Integration
out_bi <- interflex(Y = "letcs", D = "lintensity_investment_employment", 
                       X = "BI_mc", 
                       Z = c("marketsize","growthrate", "lscale", "soe_influence", "mgovthelp", "mtaxrate",
                             "gdpper_1999_2003", "pop","soe", "collective" , 
                             "private", "foreign", "lage", "lemp",
                             "sales_otherprov", "govtsales", "soesales", "relationship", 
                             "licenses", "interaction"), 
                       data = mydata, estimator = 'binning', vartype = "robust", main = "Marginal Effects",
                       na.rm = TRUE)

out_bi$tests

# Enforcement
out_enforce <- interflex(Y = "letcs", D = "lintensity_investment_employment", 
                    X = "enforce", 
                    Z = c("marketsize","growthrate", "lscale", "soe_influence", "mgovthelp", "mtaxrate",
                          "gdpper_1999_2003", "pop","soe", "collective" , 
                          "private", "foreign", "lage", "lemp",
                          "sales_otherprov", "govtsales", "soesales", "relationship", 
                          "licenses", "interaction"), 
                    data = mydata, estimator = 'binning', vartype = "robust", main = "Marginal Effects",
                    na.rm = TRUE)

out_enforce$tests

#===============================================================================
# Appendix B: Figure F
#-------------------------------------------------------------------------------
# Note: use "IVregression.do" for the IV regression results reported in Tables A, B, C & D.
#===============================================================================

fai.data <-mydata%>%select(hydd,indcode2,lintensity_investment_employment,
                           log_fix_emp_mean)%>%distinct(hydd, .keep_all = T)

ggplot(fai.data, aes(x=lintensity_investment_employment, y=log_fix_emp_mean)) +
  geom_point(na.rm = T) + 
  geom_smooth(method = lm, se = FALSE, color = 'red',na.rm = T) + 
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10)) +  xlim(-1.5, 2) +
  labs(title="",  x="FAI (U.S.)", y="FAI (China)")

ggsave("FAIplot.pdf", device = "pdf",width=3.25,height=2.5)


fai.data2<-na.omit(fai.data)
fai.data2$USFAIrank<-group_indices(fai.data2, lintensity_investment_employment)
fai.data2$ChinaFAIrank<-group_indices(fai.data2, log_fix_emp_mean)

ggplot(fai.data2, aes(x=USFAIrank, y=ChinaFAIrank)) +geom_point(na.rm = T) + 
  geom_smooth(method = lm, se = FALSE, color = 'red',na.rm = T) + 
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10)) + 
  labs(title="",  x="FAI Ranking (U.S.)", y="FAI Ranking (China)")

ggsave("FAIRankingPlot.pdf", device = "pdf",width=3.25,height=2.5)

#---------------
# correlation
cor.test(fai.data$lintensity_investment_employment,fai.data$log_fix_emp_mean,
    method = "pearson", use = "complete.obs")

cor.test(fai.data2$USFAIrank,fai.data2$ChinaFAIrank,
         method = "pearson", use = "complete.obs")

#===============================================================================
# Appendix E: Results with Post-stratification Weights
#===============================================================================

# Table E 
base_1_pwt <- lm(letcs ~ 
               lintensity_investment_employment +
               marketsize + growthrate + lscale + soe_influence +
               mgovthelp + mtaxrate + 
               soe + collective + private + foreign  + 
               lage + lemp + sales_otherprov + govtsales + 
               soesales + relationship + licenses + interaction +
               + factor(province),data=mydata, weights = weights)
#------------
base_2_pwt <- lm(letcs ~ 
               lintensity_investment_employment +
               marketsize + growthrate + lscale + soe_influence +
               mgovthelp + mtaxrate + 
               soe + collective + private + foreign +
               lage + lemp + sales_otherprov + govtsales + 
               soesales + relationship + licenses + interaction + lceopay +
               factor(province),data=mydata, weights = weights)
#------------
base_3_pwt <- lm(letcs ~ 
              lintensity_investment_employment +
               marketsize + growthrate + lscale + soe_influence +
               mgovthelp + mtaxrate + 
               soe + collective + private + foreign +
               lage + lemp + sales_otherprov + govtsales + 
               soesales + relationship + licenses + interaction + gm_govt +
               factor(province),data=mydata, weights = weights)

#-------------------------------------------------------------------------------
# Calculate Clustered Standard Errors
#-------------------------------------------------------------------------------
#install.packages("lmtest")
#install.packages("sandwich")
library(lmtest)
library(sandwich)

CL.vcov<-function(model, cluster){
  #calculate degree of freedom adjustment
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- length(coef(model)) - M + 1
  dfc <- (M/(M-1))*((N-1)/(N-K))
  #calculate the uj's
  uj  <- apply(estfun(model),2, function(x) tapply(x, cluster, sum))
  #use sandwich to get the var-covar matrix
  vcovCL <- dfc*sandwich(model, meat=crossprod(uj)/N)
  return(vcovCL)
}

#-------------------------------------------------------------------------------
base_1_vcovCL <- CL.vcov(base_1_pwt, base_1_pwt$model[,"factor(province)"])
base_1_CL <- coeftest(base_1_pwt, base_1_vcovCL, df=29)

base_2_vcovCL <- CL.vcov(base_2_pwt, base_2_pwt$model[,"factor(province)"])
base_2_CL <- coeftest(base_2_pwt, base_2_vcovCL, df=29)

base_3_vcovCL <- CL.vcov(base_3_pwt, base_3_pwt$model[,"factor(province)"])
base_3_CL <- coeftest(base_3_pwt, base_3_vcovCL, df=29)

stargazer(base_1_CL,base_2_CL,base_3_CL, omit= c("province"), keep.stat = ("n"),
          covariate.labels =c("Fixed-Asset Intensity (log)","Market Size","Growth Rate",
                              "Scale Economies (log)","SOE Dominance", "Government Help","Tax Burden",
                              "SOE","Collective","Private","Foreign","Age (log)",
                              "Employees (log)","Out-of-Province Sales","Proportion of Sales to Government",
                              "Proportion of Sales to SOEs","Years of Relationship",
                              "Licenses","Interaction with Government","CEO Pay (log)",
                              "Government Appointed General Manager"),no.space = TRUE)

#---------------------------------------------------------------------------------------------
# Table F in Appendix C

MLM_LSC_pwt <- lmer(letcs ~ 
                    lintensity_investment_employment +
                  marketsize + growthrate + lscale + soe_influence +
                  mgovthelp + mtaxrate  +
                  LSC + gdpper_1999_2003 + pop + 
                  soe + collective + private + foreign +
                  lage + lemp + sales_otherprov + govtsales + 
                  soesales + relationship + licenses + interaction+
                  LSC * lintensity_investment_employment + (1|province),
                  data = mydata, weights = weights)
#------------
MLM_court_pwt <- lmer(letcs ~ 
                        lintensity_investment_employment +
                    marketsize + growthrate + lscale + soe_influence +
                    mgovthelp + mtaxrate  +
                    TrustCourts + gdpper_1999_2003 + pop +
                    soe + collective + private + foreign +
                    lage + lemp + sales_otherprov + govtsales + 
                    soesales + relationship + licenses + interaction+
                    TrustCourts * lintensity_investment_employment + (1|province),
                    data = mydata, weights = weights)

#------------
MLM_bi_pwt <- lmer(letcs ~ 
                     lintensity_investment_employment +
                 marketsize + growthrate + lscale + soe_influence +
                 mgovthelp + mtaxrate  +
                 BI_mc + gdpper_1999_2003 + pop + 
                 soe + collective + private + foreign +
                 lage + lemp + sales_otherprov + govtsales + 
                 soesales + relationship + licenses + interaction+
                 BI_mc * lintensity_investment_employment + (1|province),
               data = mydata, weights = weights)


MLM_enforce_pwt <- lmer(letcs ~ 
                          lintensity_investment_employment +
                         marketsize + growthrate + lscale + soe_influence +
                         mgovthelp + mtaxrate  +
                         enforce + gdpper_1999_2003 + pop +
                         soe + collective + private + foreign +
                         lage + lemp + sales_otherprov + govtsales +
                         soesales + relationship + licenses + interaction+
                         enforce * lintensity_investment_employment + (1|province),
                       data = mydata, weights = weights)


stargazer(MLM_LSC_pwt,MLM_court_pwt,MLM_bi_pwt,MLM_enforce_pwt, keep.stat = ("n"),
          covariate.labels =c("Fixed-Asset Intensity (log)","Market Size","Growth Rate",
                              "Scale Economies (log)","SOE Dominance", "Government Help","Tax Burden",
                              "LSC","Trust in Courts","Bureaucratic Integration", 
                              "Enforcement (Factor Scores)",
                              "GDP per Capita (log)","Population",
                              "SOE","Collective","Private","Foreign","Age (log)",
                              "Employees (log)","Out-of-Province Sales","Proportion of Sales to Government",
                              "Proportion of Sales to SOEs","Years of Relationship",
                              "Licenses","Interaction with Government",
                              "FAI x LSC", "FAI x Trust in Courts",
                              "FAI x Bureaucratic Integration", "FAI x Enforcement (Factor Scores)"),no.space = TRUE)

#===============================================================================
# Appendix D: Entertaining, Travel, and Conference Expenses
#===============================================================================

# Table G 

rb_etcs1 <- lm(letcs2 ~ 
                 lintensity_investment_employment +
                 marketsize + growthrate + lscale + soe_influence +
                 mgovthelp + mtaxrate + 
                 soe + collective + private + foreign +
                 lage + lemp + sales_otherprov + govtsales + 
                 soesales + relationship + licenses + interaction +
                 + factor(province),data=mydata)
#------------
rb_etcs2 <- lm(letcs2 ~ 
                 lintensity_investment_employment +
                 marketsize + growthrate + lscale + soe_influence +
                 mgovthelp + mtaxrate + 
                 soe + collective + private + foreign +
                 lage + lemp + sales_otherprov + govtsales +
                 soesales + relationship + licenses + interaction + lceopay +
                 factor(province),data=mydata)
#------------
rb_etcs3 <- lm(letcs2 ~ 
                 lintensity_investment_employment +
                 marketsize + growthrate + lscale + soe_influence +
                 mgovthelp + mtaxrate + 
                 soe + collective + private + foreign +
                 lage + lemp + sales_otherprov + govtsales +
                 soesales + relationship + licenses + interaction + gm_govt +
                 factor(province),data=mydata)

#-------------------------------------------------------------------------------
# calculate clustered standard errors
#-------------------------------------------------------------------------------

rb_etcs1_vcovCL <- CL.vcov(rb_etcs1, rb_etcs1$model[,"factor(province)"])
rb_etcs1_CL <- coeftest(rb_etcs1, rb_etcs1_vcovCL, df=29)

rb_etcs2_vcovCL <- CL.vcov(rb_etcs2, rb_etcs2$model[,"factor(province)"])
rb_etcs2_CL <- coeftest(rb_etcs2, rb_etcs2_vcovCL, df=29)

rb_etcs3_vcovCL <- CL.vcov(rb_etcs3, rb_etcs3$model[,"factor(province)"])
rb_etcs3_CL <- coeftest(rb_etcs3, rb_etcs3_vcovCL, df=29)

stargazer(rb_etcs1_CL,rb_etcs2_CL,rb_etcs3_CL,  omit= c("province"), 
          covariate.labels =c("Fixed-Asset Intensity (log)","Market Size","Growth Rate",
                              "Scale Economies (log)","SOE Dominance", "Government Help","Tax Burden",
                              "SOE","Collective","Private","Foreign","Age (log)",
                              "Employees (log)","Out-of-Province Sales","Proportion of Sales to Government",
                              "Proportion of Sales to SOEs","Years of Relationship",
                              "Licenses","Interaction with Government","CEO Pay (log)",
                              "Government Appointed General Manager"),no.space = TRUE, keep.stat = c("n"))

#-------------------------------------------------------------------------------
#Table H
#-------------------------------------------------------------------------------
rb_MLM_LSC <- lmer(letcs2 ~ 
                     lintensity_investment_employment +
                       marketsize + growthrate + lscale + soe_influence +
                       mgovthelp + mtaxrate  +
                       LSC + gdpper_1999_2003 + pop + 
                       soe + collective + private + foreign +
                       lage + lemp + sales_otherprov + govtsales + 
                       soesales + relationship + licenses + interaction+
                       LSC * lintensity_investment_employment + (1|province),
                      data = mydata)
#------------
rb_MLM_court <- lmer(letcs2 ~ 
                       lintensity_investment_employment +
                         marketsize + growthrate + lscale + soe_influence +
                         mgovthelp + mtaxrate  +
                         TrustCourts + gdpper_1999_2003 + pop +
                         soe + collective + private + foreign +
                         lage + lemp + sales_otherprov + govtsales + 
                         soesales + relationship + licenses + interaction+
                         TrustCourts * lintensity_investment_employment + (1|province),
                         data = mydata)
#------------
rb_MLM_bi <- lmer(letcs2 ~ 
                    lintensity_investment_employment +
                       marketsize + growthrate + lscale + soe_influence +
                       mgovthelp + mtaxrate  +
                       BI_mc + gdpper_1999_2003 + pop +
                       soe + collective + private + foreign +
                       lage + lemp + sales_otherprov + govtsales + 
                       soesales + relationship + licenses + interaction+
                       BI_mc * lintensity_investment_employment + (1|province),
                       data = mydata)

rb_MLM_enforce <- lmer(letcs2 ~ 
                         lintensity_investment_employment +
                    marketsize + growthrate + lscale + soe_influence +
                    mgovthelp + mtaxrate  +
                    enforce + gdpper_1999_2003 + pop +
                    soe + collective + private + foreign +
                    lage + lemp + sales_otherprov + govtsales + 
                    soesales + relationship + licenses + interaction+
                    enforce * lintensity_investment_employment + (1|province),
                    data = mydata)

stargazer(rb_MLM_LSC,rb_MLM_court,rb_MLM_bi,rb_MLM_enforce, keep.stat = ("n"),
          covariate.labels =c("Fixed-Asset Intensity (log)","Market Size","Growth Rate",
                              "Scale Economies (log)","SOE Dominance", "Government Help","Tax Burden",
                              "LSC","Trust in Courts","Bureaucratic Integration", 
                              "Enforcement (Factor Scores)",
                              "GDP per Capita (log)","Population",
                              "SOE","Collective","Private","Foreign","Age (log)",
                              "Employees (log)","Out-of-Province Sales","Proportion of Sales to Government",
                              "Proportion of Sales to SOEs","Years of Relationship",
                              "Licenses","Interaction with Government",
                              "FAI x LSC", "FAI x Trust in Courts",
                              "FAI x Bureaucratic Integration", "FAI x Enforcement (Factor Scores)"),no.space = TRUE)

#===============================================================================
# Appendix F: Other Supplementary Information
#===============================================================================

# Table K: Coastal Provinces and Four Provincial municipalities

region_LSC <- lmer(letcs ~ 
                     lintensity_investment_employment +
                     marketsize + growthrate + lscale + soe_influence +
                     mgovthelp + mtaxrate  +
                     LSC + gdpper_1999_2003 + pop + coastal +
                     soe + collective + private + foreign +
                     lage + lemp + sales_otherprov + govtsales + 
                     soesales + relationship + licenses + interaction+
                     LSC * lintensity_investment_employment + (1|province),
                   data = mydata)
#------------
region_court <- lmer(letcs ~ 
                       lintensity_investment_employment +
                       marketsize + growthrate + lscale + soe_influence +
                       mgovthelp + mtaxrate  +
                       TrustCourts + gdpper_1999_2003 + pop + coastal +
                       soe + collective + private + foreign +
                       lage + lemp + sales_otherprov + govtsales + 
                       soesales + relationship + licenses + interaction+
                       TrustCourts * lintensity_investment_employment + (1|province),
                     data = mydata)

#------------
region_bi <- lmer(letcs ~ 
                    lintensity_investment_employment +
                    marketsize + growthrate + lscale + soe_influence +
                    mgovthelp + mtaxrate  +
                    BI_mc + gdpper_1999_2003 + pop + coastal +
                    soe + collective + private + foreign +
                    lage + lemp + sales_otherprov + govtsales + 
                    soesales + relationship + licenses + interaction+
                    BI_mc * lintensity_investment_employment + (1|province),
                  data = mydata)


#------------
region_enforce <- lmer(letcs ~ 
                         lintensity_investment_employment +
                         marketsize + growthrate + lscale + soe_influence +
                         mgovthelp + mtaxrate  +
                         enforce + gdpper_1999_2003 + pop + coastal +
                         soe + collective + private + foreign +
                         lage + lemp + sales_otherprov + govtsales + 
                         soesales + relationship + licenses + interaction+
                         enforce * lintensity_investment_employment + (1|province),
                       data = mydata)

stargazer(region_LSC,region_court,region_bi, region_enforce, omit = c("province"),
          covariate.labels =c("Fixed-Asset Intensity (log)","Market Size","Growth Rate",
                              "Scale Economies (log)","SOE Dominance", "Government Help","Tax Burden",
                              "LSC","Trust in Courts","Bureaucratic Integration", 
                              "Enforcement (Factor Scores)",
                              "GDP per Capita (log)","Population", "Municipalities & Coastal Prov",
                              "SOE","Collective","Private","Foreign","Age (log)",
                              "Employees (log)","Out-of-Province Sales","Proportion of Sales to Government",
                              "Proportion of Sales to SOEs","Years of Relationship",
                              "Licenses","Interaction with Government",
                              "FAI x LSC", "FAI x Trust in Courts",
                              "FAI x Bureaucratic Integration", "FAI x Enforcement (Factor Scores)"),
          no.space = TRUE)

#-------------------------------------------------------------------------------
# Table L: Descriptive Statistics
#-------------------------------------------------------------------------------
#install.packages("psych")
library(psych)
prov_ind.data <- mydata %>% dplyr::select(prov_ind, lnle_fix_emp_mean, 
                                          marketsize,growthrate,
                                          lscale, soe_influence, mgovthelp,mtaxrate,
                                          w_con_sal4,hhi) %>% 
  distinct(prov_ind, .keep_all = T)%>%dplyr::select(-prov_ind) 

ind_data <- mydata %>% dplyr::select(hydd,lintensity_investment_employment) %>%
  distinct(hydd, .keep_all = T) %>% dplyr::select(-hydd)

prov_data <- mydata %>% dplyr::select(province,LSC,TrustCourts, BI_mc, enforce, gdpper_1999_2003,pop) %>%
  distinct(province, .keep_all = T) %>% dplyr::select(-province)

data_summary_1 <- mydata %>% dplyr::select(letcs,
                                           soe,collective,private,foreign, 
                                           lage,lemp,sales_otherprov,govtsales,
                                           soesales,relationship,licenses,interaction,lceopay,gm_govt) %>% 
  describe(.) %>% as_tibble %>% 
  dplyr::select(n,mean,sd,median,min,max) %>% round(.,2) %>% as.data.frame() %>% 
  mutate(variable = rownames(.))

data_summary_2 <- ind_data %>% 
  describe(.) %>% as_tibble %>% 
  dplyr::select(n,mean,sd,median,min,max) %>% round(.,2) %>% as.data.frame() %>% 
  mutate(variable = rownames(.))

data_summary_3 <- prov_ind.data %>% 
  describe(.) %>% as_tibble %>% 
  dplyr::select(n,mean,sd,median,min,max) %>% round(.,2) %>% as.data.frame() %>% 
  mutate(variable = rownames(.))

data_summary_4 <- prov_data %>% 
  describe(.) %>% as_tibble %>% 
  dplyr::select(n,mean,sd,median,min,max) %>% round(.,2) %>% as.data.frame() %>% 
  mutate(variable = rownames(.))

data_summary <- bind_rows(list(data_summary_2,data_summary_3,data_summary_4,data_summary_1)) %>% 
  dplyr::select(variable,everything())

print(data_summary )
detach("package:psych")


#-------------------------------------------------------------------------------
# Table M: Industrial Distribution of Firms in the Sample
#-------------------------------------------------------------------------------
sample_ind_dist <- mydata %>% dplyr::select(hydd,industryname) %>% 
  group_by(hydd,industryname) %>% tally() %>% ungroup() %>% 
  mutate(pct = 100*n/sum(n)) %>% 
  mutate_if(is.numeric,round,2)

fai.us.log.mat <- mydata %>% dplyr::select(hydd,lintensity_investment_employment)%>%
  distinct(hydd, .keep_all = T)
fai.us.log<-as.vector(fai.us.log.mat[order(fai.us.log.mat$hydd),2])

sample_ind_dist2<-cbind(sample_ind_dist, fai.us.log)

sample_ind_dist2%>%mutate_if(is.numeric,round,2)

#-------------------------------------------------------------------------------
# Figure G: Plot of Fixed Asset Intensity (U.S. Data)
#-------------------------------------------------------------------------------
fai.us<-mydata%>%dplyr::select(hydd, industryname, intensity_investment_employment)%>%
  distinct(hydd, .keep_all = T)

ggplot(fai.us,aes(x=reorder(industryname, intensity_investment_employment),
                  y = intensity_investment_employment)) + 
  geom_bar(stat = "identity", position = 'dodge') +
  theme_bw() +
  xlab('Industry')+
  ylab('Fixed Asset Intensity') +
  coord_flip() +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor = element_blank(),axis.text=element_text(size=9))

ggsave('industry_fai.pdf',width = 11, height = 4,device=cairo_pdf)


#-------------------------------------------------------------------------------
# Table N: Geographical Distribution of Firms in the Sample
#-------------------------------------------------------------------------------
sample_geo_dist <- mydata %>% dplyr::select(province,city) %>% 
  group_by(province,city) %>% 
  tally() %>% arrange(province) %>% 
  ungroup() 

